# coding:utf8
import pycurl
import urllib
import re
from utils import kill_captcha
import StringIO
import random
from bs4 import BeautifulSoup
import json
import table
import requests


def curl(url, data='', cookie='', debug=False):  # 抓取函数[get,post]
    UserAgent = "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)"
    s = StringIO.StringIO()
    c = pycurl.Curl()
    c.setopt(c.URL, url)
    c.setopt(c.REFERER, 'http://gsxt.scaic.gov.cn/')
    if cookie:
        c.setopt(c.COOKIEJAR, "cookie_file_name1")
    c.setopt(c.COOKIEFILE, "cookie_file_name1")
    c.setopt(pycurl.FOLLOWLOCATION, True)
    if data:
        c.setopt(c.POSTFIELDS, urllib.urlencode(data))
    c.setopt(pycurl.ENCODING, 'gzip')
    c.setopt(c.HTTPHEADER, ['Host:wsgs.fjaic.gov.cn', 'Upgrade-Insecure-Requests:1',
                            'User-Agent:Googlebot/2.1 (+http://www.googlebot.com/bot.html)',
                            'Origin:http://gsxt.scaic.gov.cn'])
    c.setopt(c.WRITEDATA, s)
    c.perform()
    c.close()
    return s.getvalue()


def verify(name, **args):
    global detail_url
    while 1:
        try:
            code = curl('http://wsgs.fjaic.gov.cn/creditpub/home', cookie=1)
            verify_url = "http://wsgs.fjaic.gov.cn/creditpub/captcha?preset=str-01,math-01&ra=%d" % random.random()
            verify_image = curl(verify_url)
            # verify = kill_captcha(verify_image,'fj','jpeg')
            code = re.findall('code:"(.+?)"', code.replace(' ', ''))[0]
            data = {"searchType": 1, "captcha": 123, "session.token": code, "condition.keyword": name}
            search_url = 'http://wsgs.fjaic.gov.cn/creditpub/search/ent_info_list'
            html = curl(search_url, data)
            try:
                detail_url = \
                re.findall('<a href="(http://wsgs\.fjaic\.gov\.cn/creditpub/notice/view\?uuid=.+?)"', html)[0]
            except:
                return ''
            detail_html = curl(detail_url)
            if args.get('type') == 1:
                return (detail_html, detail_url)
            return detail_html
        except:
            continue


def run(detail_html, **args):
    tables = re.findall('<table[\s\S]+?</table>', detail_html)
    for j in tables:
        word = re.findall('<th colspan="\d+?">(.+?)</th>', j)
        if '股东信息' in j:
            shareHolderList = table.index('股东信息', j)
            if shareHolderList:
                for i in shareHolderList:
                    if 'href' in i['shareHolderdetail']:
                        share_url = re.findall('href="(.+?)"', i['shareHolderdetail'])[0]
                        html = curl(share_url)
                        html = html.replace(' ', '')
                        subConam = re.findall('invt\.subConAm="(.+?)"', html)
                        subConam = subConam[0] if subConam else ''
                        conDate = ""
                        fundedRatio = ""
                        regCapCur = re.findall('invt\.conForm="(.+?)"', html)
                        regCapCur = regCapCur[0] if regCapCur else ''
                        country = ""
                        i['shareHolderdetail'] = share_url
                        i['subConam'] = subConam
                        i['conDate'] = conDate
                        i['fundedRatio'] = fundedRatio
                        i['regCapCur'] = regCapCur
                        i['country'] = country
                    else:
                        i['shareHolderdetail'] = ''
                        i['subConam'] = ''
                        i['conDate'] = ''
                        i['fundedRatio'] = ''
                        i['regCapCur'] = ''
                        i['country'] = ''

        try:
            if '基本信息' == word[0]:
                basicList = table.index(word[0].replace(' ', ''), j)
            # if '股东信息' == word[0]:
            #    shareHolderList = table.index('股东信息',j)
            if '主要人员信息' == word[0]:
                personList = table.index(word[0].replace(' ', ''), j)
            if '变更信息' == word[0]:
                alterList = table.index(word[0].replace(' ', ''), j)
            if '分支机构信息' == word[0]:
                filiationList = table.index(word[0].replace(' ', ''), j)
            if '清算信息' == word[0]:
                liquidationList = table.index(word[0].replace(' ', ''), j)
            if '经营异常' == word[0] or '经营异常信息' == word[0]:
                abnormalOperation = table.index(word[0].replace(' ', ''), j)
        except:
            print word
            continue
    try:
        print basicList
    except:
        basicList = []
    try:
        print shareHolderList
    except:
        shareHolderList = []
    try:
        print personList
    except:
        personList = []
    try:
        print alterList
    except:
        alterList = []
    try:
        print filiationList
    except:
        filiationList = []
    try:
        print liquidationList
    except:
        liquidationList = []
    try:
        print abnormalOperation
    except:
        abnormalOperation = []

    punishBreakList = []
    punishedList = []
    alidebtList = []
    # entinvItemList = [{"entName":"","entType":"","fundedRatio":"","currency":"","entStatus":"","canDate":"","esDate":"","regOrg":"","regCapcur":"","regCap":"","revDate":"","name":"","subConam":"","regNo":""}]
    entinvItemList = []
    frinvList = []
    frPositionList = []
    caseInfoList = []
    sharesFrostList = []
    sharesImpawnList = []
    morDetailList = []
    morguaInfoList = []
    report_url = detail_url.replace('tab=01', 'tab=02')
    html = curl(report_url)
    report_url = re.findall('"(http://wsgs\.fjaic\.gov\.cn/creditpub/notice/view_annual.+?)" target="_blank">(\d+)',
                            html)
    yearReportList = []
    yearList = []
    for i in report_url:
        print i
        url = i[0]
        year = i[1]
        html = curl(url)
        # print html
        table_list = re.findall('<table[\s\S]+?</table>', html)
        for j in table_list:
            if '企业基本信息' in j:
                report_basic = table.report_basic(j)
            if '网站或网店信息' in j:
                report_website = table.report_website(j)
            if '企业资产状况信息' in j:
                report_assetsInfo = table.report_assetsInfo(j)
            if '股东及出资信息' in j:
                report_investorInformations = table.report_investorInformations(j)
            if '股权变更信息' in j:
                report_equityChangeInformations = table.report_equityChangeInformations(j)
            if '修改记录' in j:
                report_changeRecords = table.report_changeRecords(j)
            try:
                print report_basic
            except:
                report_basic = {}
            try:
                print report_website
            except:
                report_website = {}
            try:
                print report_assetsInfo
            except:
                report_assetsInfo = {}
            try:
                print report_investorInformations
            except:
                report_investorInformations = []
            try:
                print report_equityChangeInformations
            except:
                report_equityChangeInformations = []
            try:
                print report_changeRecords
            except:
                report_changeRecords = []
        dit1 = {"year": year, "baseInfo": report_basic, "website": report_website,
                "investorInformations": report_investorInformations, "assetsInfo": report_assetsInfo,
                "equityChangeInformations": report_equityChangeInformations, "changeRecords": report_changeRecords}
        dit2 = {"year": year, "html": html}
        yearList.append(dit2)
        yearReportList.append(dit1)
    alldata = {'province': 'fj', "abnormalOperation": abnormalOperation, "basicList": basicList,
               "shareHolderList": shareHolderList, "personList": personList, "punishBreakList": punishBreakList,
               "punishedList": punishedList, "alidebtList": alidebtList, "entinvItemList": entinvItemList,
               "frinvList": frinvList, "frPositionList": frPositionList, "alterList": alterList,
               "filiationList": filiationList, "caseInfoList": caseInfoList, "sharesFrostList": sharesFrostList,
               "sharesImpawnList": sharesImpawnList, "morDetailList": morDetailList, "morguaInfoList": morguaInfoList,
               "liquidationList": liquidationList, "yearReportList": yearReportList}
    if args.get('type') == 1:
        # print json.dumps(basicList,ensure_ascii=False,indent=4)
        html_source = {"province": "fj", "type": 0, "html": detail_html, "keyword": args.get('searchkey', "none"),
                       "companyName": basicList[0]['enterpriseName'], "yearList": yearList}
        companyUrl = {'url': args.get('companyUrl'), "method": "get", "companyName": basicList[0]['enterpriseName'],
                      "province": "fj"}
        return (html_source, alldata, companyUrl)
    return alldata


def search(key):
    html = verify(key)
    if html:
        result = run(html)
        return result
    else:
        return {}


def search2(key):
    global detail_url
    html = verify(key, type=1)
    print html
    url = html[1]
    detail_url = url
    html = html[0]
    if html:
        result = run(html, searchkey=key, type=1, companyUrl=url)
        return result
    else:
        return ()


def search3(data):
    global detail_url
    url = data.get('url')
    detail_url = url
    html = curl(url)
    key = data.get("companyName", "")
    if html and key:
        result = run(html, searchkey=key, type=1, companyUrl=url)
        return result
    else:
        raise Exception("error")


if __name__ == "__main__":
    # print json.dumps(search2('晋江市福港贸易有限公司'), ensure_ascii=False, indent=4)
    # print json.dumps(search3('http://wsgs.fjaic.gov.cn/creditpub/notice/view?uuid=2fBVdB4sX1E76PWEfMlquNT6SUcd6ZPF&tab=01'),ensure_ascii=False,indent=4)
    # from pymongo import MongoClient
    # client = MongoClient('192.168.31.121')
    # db = client.crawler_company_name
    # co = db.companyName
    # for i in co.find({"province":"fj"}):
    #     num += 1
    #     print i
    #     name  = i['companyName']
    #     print '*'*100
    #     print name
    #     print '*'*100
    #     print json.dumps(search2(name),ensure_ascii=False,indent=4)
    #     print '#'*100
    #     print num
    #     print '#'*100
    com_info = { "companyName" : "周宁县战网网络科技有限公司", "url" : "http://wsgs.fjaic.gov.cn/creditpub/notice/view?uuid=QFPYsb7Ey7VkNV9AKvl9KgRrUQ7LMVI5&tab=01", "province" : "fj", "method" : "get" }
    res_data = search3(com_info)
    print json.dumps(res_data,ensure_ascii=False,indent=4)